This document aims to provide further examples in how to use the hpgltools.
Note to self, the header has rmarkdown::pdf_document instead of html_document or html_vignette because it gets some bullcrap error ‘margins too large’…
Here are the commands I invoke to get ready to play with new data, including everything required to install hpgltools, the software it uses, and the fission data.
library(hpgltools)
tt <- sm(library(fission))
tt <- data(fission)
All the work I do in Dr. El-Sayed’s lab makes some pretty hard assumptions about how data is stored. As a result, to use the fission data set I will do a little bit of shenanigans to match it to the expected format. Now that I have played a little with fission, I think its format is quite nice and am likely to have my experiment class instead be a SummarizedExperiment.
## Extract the meta data from the fission dataset
meta <- as.data.frame(fission@colData)
## Make conditions and batches
meta$condition <- paste(meta$strain, meta$minute, sep=".")
meta$batch <- meta$replicate
meta$sample.id <- rownames(meta)
## Grab the count data
fission_data <- fission@assays$data$counts
## This will make an experiment superclass called 'expt' and it contains
## an ExpressionSet along with any arbitrary additional information one might want to include.
## Along the way it writes a Rdata file which is by default called 'expt.Rdata'
fission_expt <- create_expt(metadata=meta, count_dataframe=fission_data)
## Reading the sample metadata.
## The sample definitions comprises: 36 rows(samples) and 7 columns(metadata fields).
## Matched 7039 annotations and counts.
## Bringing together the count matrix and gene information.
Travis wisely imposes a limit on the amount of time for building vignettes. My tools by default will attempt all possible pairwise comparisons, which takes a long time. Therefore I am going to take a subset of the data and limit these comparisons to that.
fun_data <- subset_expt(fission_expt,
subset="condition=='wt.120'|condition=='wt.30'")
## There were 36, now there are 6 samples.
fun_norm <- sm(normalize_expt(fun_data, batch="limma", norm="quant",
transform="log2", convert="cpm"))
limma_comparison <- sm(limma_pairwise(fun_data))
names(limma_comparison$all_tables)
## [1] "wt30_vs_wt120"
summary(limma_comparison$all_tables$wt30_vs_wt120)
## logFC AveExpr t P.Value
## Min. :-4.278 Min. :-4.58 Min. :-88.48 Min. :0.0000
## 1st Qu.:-0.399 1st Qu.: 1.11 1st Qu.: -2.60 1st Qu.:0.0192
## Median :-0.020 Median : 3.97 Median : -0.13 Median :0.1240
## Mean : 0.008 Mean : 3.11 Mean : -0.17 Mean :0.2792
## 3rd Qu.: 0.300 3rd Qu.: 5.44 3rd Qu.: 1.72 3rd Qu.:0.4653
## Max. : 7.075 Max. :18.59 Max. : 62.44 Max. :1.0000
## adj.P.Val B
## Min. :0.0170 Min. :-8.29
## 1st Qu.:0.0767 1st Qu.:-6.58
## Median :0.2479 Median :-5.50
## Mean :0.3686 Mean :-4.87
## 3rd Qu.:0.6204 3rd Qu.:-3.50
## Max. :1.0000 Max. : 4.83
scatter_wt_mut <- extract_coefficient_scatter(limma_comparison, type="limma",
x="wt30", y="wt120")
## This can do comparisons among the following columns in the pairwise result:
## wt120, wt30
## Actually comparing wt30 and wt120.
scatter_wt_mut$scatter
scatter_wt_mut$both_histogram$plot + ggplot2::scale_y_continuous(limits=c(0, 0.20))
## Warning: Removed 7039 rows containing non-finite values (stat_bin).
## Warning: Removed 7039 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing missing values (geom_bar).
## Warning: Removed 1 rows containing missing values (geom_vline).
ma_wt_mut <- extract_de_plots(limma_comparison, type="limma")
ma_wt_mut$ma$plot
ma_wt_mut$volcano$plot
deseq_comparison <- sm(deseq2_pairwise(fun_data))
summary(deseq_comparison$all_tables$wt30_vs_wt120)
## baseMean logFC lfcSE stat
## Min. : 0 Min. :-5.615 Min. :0.000 Min. :-20.800
## 1st Qu.: 28 1st Qu.:-0.386 1st Qu.:0.168 1st Qu.: -1.176
## Median : 192 Median : 0.000 Median :0.222 Median : 0.000
## Mean : 1703 Mean : 0.020 Mean :0.489 Mean : 0.168
## 3rd Qu.: 536 3rd Qu.: 0.343 3rd Qu.:0.412 3rd Qu.: 1.109
## Max. :4924000 Max. : 7.212 Max. :4.072 Max. : 30.370
## P.Value adj.P.Val
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0197 1st Qu.:0.0685
## Median :0.2503 Median :0.4676
## Mean :0.3600 Mean :0.4805
## 3rd Qu.:0.6666 3rd Qu.:0.8732
## Max. :1.0000 Max. :1.0000
scatter_wt_mut <- extract_coefficient_scatter(deseq_comparison, type="deseq",
x="wt30", y="wt120", gvis_filename=NULL)
## This can do comparisons among the following columns in the pairwise result:
## wt120, wt30, r2, r3
## Actually comparing wt30 and wt120.
scatter_wt_mut$scatter
plots_wt_mut <- extract_de_plots(deseq_comparison, type="deseq")
plots_wt_mut$ma$plot
plots_wt_mut$volcano$plot
edger_comparison <- sm(edger_pairwise(fun_data, model_batch=TRUE))
plots_wt_mut <- extract_de_plots(edger_comparison, type="edger")
scatter_wt_mut <- extract_coefficient_scatter(edger_comparison, type="edger",
x="wt30", y="wt120", gvis_filename=NULL)
## This can do comparisons among the following columns in the pairwise result:
## wt120, wt30
## Actually comparing wt30 and wt120.
scatter_wt_mut$scatter
plots_wt_mut$ma$plot
plots_wt_mut$volcano$plot
ebseq_comparison <- sm(ebseq_pairwise(fun_data))
head(ebseq_comparison$all_tables[[1]])
## ebseq_FC logFC ebseq_postfc ebseq_mean PPEE PPDE
## SPAC1002.01 0.5391 -0.89135 0.5554 11.405 0.468257 0.53174
## SPAC1002.02 1.0571 0.08007 1.0567 88.884 0.917728 0.08227
## SPAC1002.03c 0.8580 -0.22094 0.8581 1637.860 0.694695 0.30531
## SPAC1002.04c 1.2573 0.33028 1.2566 223.063 0.568149 0.43185
## SPAC1002.05c 1.6914 0.75825 1.6888 188.029 0.002677 0.99732
## SPAC1002.06c 2.1381 1.09636 1.9456 4.165 0.587048 0.41295
## ebseq_adjp
## SPAC1002.01 0.468257
## SPAC1002.02 0.917728
## SPAC1002.03c 0.694695
## SPAC1002.04c 0.568149
## SPAC1002.05c 0.002677
## SPAC1002.06c 0.587048
basic_comparison <- sm(basic_pairwise(fun_data))
summary(basic_comparison$all_tables$wt30_vs_wt120)
## numerator_median denominator_median numerator_var denominator_var
## Min. :-2.73 Min. :-3.60 Length:5505 Length:5505
## 1st Qu.: 3.31 1st Qu.: 3.31 Class :character Class :character
## Median : 4.65 Median : 4.63 Mode :character Mode :character
## Mean : 4.71 Mean : 4.71
## 3rd Qu.: 5.94 3rd Qu.: 5.93
## Max. :18.61 Max. :18.61
## t p logFC adjp
## Min. :-50.21 Length:5505 Min. :-4.263 Length:5505
## 1st Qu.: -2.10 Class :character 1st Qu.:-0.406 Class :character
## Median : -0.39 Mode :character Median :-0.070 Mode :character
## Mean : -0.16 Mean : 0.008
## 3rd Qu.: 1.53 3rd Qu.: 0.297
## Max. : 49.10 Max. : 7.485
scatter_wt_mut <- extract_coefficient_scatter(basic_comparison, type="basic",
x="wt30", y="wt120", gvis_filename=NULL)
## This can do comparisons among the following columns in the pairwise result:
## wt120, wt30
## Actually comparing wt30 and wt120.
## Did not find wt30 or wt120.
scatter_wt_mut$scatter
## NULL
plots_wt_mut <- extract_de_plots(basic_comparison, type="basic")
plots_wt_mut$ma$plot
plots_wt_mut$volcano$plot
all_comparisons <- sm(all_pairwise(fun_data, model_batch=TRUE, parallel=FALSE))
all_combined <- sm(combine_de_tables(all_comparisons, excel=FALSE))
head(all_combined$data[[1]], n=3)
## deseq_logfc deseq_adjp edger_logfc edger_adjp limma_logfc
## SPAC1002.01 -1.08000 0.3664 -1.05900 0.2201 -0.99860
## SPAC1002.02 -0.01485 0.9816 -0.02342 1.0000 0.03778
## SPAC1002.03c -0.22760 0.2327 -0.23630 0.1598 -0.33910
## limma_adjp basic_nummed basic_denmed basic_numvar
## SPAC1002.01 0.16930 0.000 0.000 0
## SPAC1002.02 0.99460 3.100 2.774 3.603e-01
## SPAC1002.03c 0.02432 6.909 7.248 2.955e-03
## basic_denvar basic_logfc basic_t basic_p basic_adjp
## SPAC1002.01 0 0.000 0.00000 0 0
## SPAC1002.02 2.890e-02 0.326 0.01124 9.919e-01 9.963e-01
## SPAC1002.03c 1.016e-03 -0.339 -9.25600 1.969e-03 3.718e-02
## deseq_basemean deseq_lfcse deseq_stat deseq_p ebseq_fc
## SPAC1002.01 11.15 0.8209 -1.31600 0.1882 0.5391
## SPAC1002.02 87.42 0.3316 -0.04479 0.9643 1.0571
## SPAC1002.03c 1621.00 0.1387 -1.64100 0.1008 0.8580
## ebseq_logfc ebseq_postfc ebseq_mean ebseq_ppee ebseq_ppde
## SPAC1002.01 -0.89135 0.5554 11.41 0.4683 5.317e-01
## SPAC1002.02 0.08007 1.0567 88.88 0.9177 8.227e-02
## SPAC1002.03c -0.22094 0.8581 1637.86 0.6947 3.053e-01
## ebseq_adjp edger_logcpm edger_lr edger_p limma_ave limma_t
## SPAC1002.01 0.4683 0.06691 2.745000 0.09757 -0.1955 -2.8320
## SPAC1002.02 0.9177 2.89400 0.007429 0.93130 2.8470 0.1354
## SPAC1002.03c 0.6947 7.09500 3.399000 0.06522 7.0770 -12.5400
## limma_b limma_p limma_adjp_fdr deseq_adjp_fdr edger_adjp_fdr
## SPAC1002.01 -4.0790 0.07147 1.693e-01 4.186e-01 2.201e-01
## SPAC1002.02 -7.4050 0.90140 9.945e-01 1.000e+00 1.000e+00
## SPAC1002.03c -0.6495 0.00151 2.432e-02 2.682e-01 1.598e-01
## basic_adjp_fdr lfc_meta lfc_var lfc_varbymed p_meta
## SPAC1002.01 0.000e+00 -1.0520000 7.689e-04 -7.305e-04 1.191e-01
## SPAC1002.02 9.954e-01 0.0007106 3.191e-03 4.491e+00 9.323e-01
## SPAC1002.03c 7.607e-03 -0.2681000 2.166e-03 -8.081e-03 5.584e-02
## p_var
## SPAC1002.01 3.753e-03
## SPAC1002.02 9.899e-04
## SPAC1002.03c 2.531e-03
sig_genes <- sm(extract_significant_genes(all_combined, excel=FALSE))
head(sig_genes$limma$ups[[1]], n=3)
## deseq_logfc deseq_adjp edger_logfc edger_adjp limma_logfc
## SPBC2F12.09c 7.212 5.259e-66 7.170 1.264e-180 7.075
## SPAC22A12.17c 5.855 3.969e-19 5.822 3.155e-57 5.609
## SPAPB1A11.02 6.739 1.894e-06 6.483 1.257e-14 5.606
## limma_adjp basic_nummed basic_denmed basic_numvar
## SPBC2F12.09c 0.01847 6.250 -1.235 2.211e-02
## SPAC22A12.17c 0.02447 9.396 4.087 2.236e-02
## SPAPB1A11.02 0.01696 1.491 -3.604 6.869e-01
## basic_denvar basic_logfc basic_t basic_p basic_adjp
## SPBC2F12.09c 5.043e-01 7.485 16.760 2.432e-03 3.928e-02
## SPAC22A12.17c 9.694e-01 5.309 10.080 8.325e-03 6.437e-02
## SPAPB1A11.02 4.981e-01 5.095 7.708 1.687e-03 3.452e-02
## deseq_basemean deseq_lfcse deseq_stat deseq_p ebseq_fc
## SPBC2F12.09c 443.5 0.4123 17.490 1.667e-68 143.63
## SPAC22A12.17c 4289.0 0.6255 9.360 7.945e-21 52.82
## SPAPB1A11.02 21.2 1.2810 5.259 1.447e-07 103.34
## ebseq_logfc ebseq_postfc ebseq_mean ebseq_ppee ebseq_ppde
## SPBC2F12.09c 7.166 132.23 451.03 0 1.000e+00
## SPAC22A12.17c 5.723 52.65 4359.08 0 1.000e+00
## SPAPB1A11.02 6.691 45.38 21.63 0 0.000e+00
## ebseq_adjp edger_logcpm edger_lr edger_p limma_ave
## SPBC2F12.09c 0 5.2210 839.00 1.796e-184 2.592
## SPAC22A12.17c 0 8.4930 264.90 1.479e-59 6.482
## SPAPB1A11.02 0 0.9166 65.83 4.910e-16 -1.192
## limma_t limma_b limma_p limma_adjp_fdr deseq_adjp_fdr
## SPBC2F12.09c 19.36 0.8017 0.0004519 1.847e-02 6.176e-66
## SPAC22A12.17c 12.49 -0.3953 0.0015260 2.447e-02 4.660e-19
## SPAPB1A11.02 27.66 0.2698 0.0001667 1.696e-02 2.224e-06
## edger_adjp_fdr basic_adjp_fdr lfc_meta lfc_var
## SPBC2F12.09c 1.264e-180 9.147e-03 7.152 0.000e+00
## SPAC22A12.17c 3.155e-57 2.609e-02 5.916 1.123e-01
## SPAPB1A11.02 1.257e-14 6.586e-03 6.137 5.880e-02
## lfc_varbymed p_meta p_var
## SPBC2F12.09c 0.000e+00 1.506e-04 6.807e-08
## SPAC22A12.17c 1.898e-02 5.087e-04 7.762e-07
## SPAPB1A11.02 9.581e-03 5.561e-05 9.255e-09
## Here we see that edger and deseq agree the least:
all_comparisons$comparison$comp
## wt30_vs_wt120
## limma_vs_deseq 0.9808
## limma_vs_edger 0.9601
## limma_vs_ebseq 0.7944
## limma_vs_basic 0.9779
## deseq_vs_edger 0.9814
## deseq_vs_ebseq 0.8329
## deseq_vs_basic 0.9777
## edger_vs_ebseq 0.9165
## edger_vs_basic 0.9782
## ebseq_vs_basic 0.9776
## And here we can look at the set of 'significant' genes according to various tools:
yeast_sig <- sm(extract_significant_genes(all_combined, excel=FALSE))
yeast_barplots <- sm(significant_barplots(combined=all_combined))
yeast_barplots$limma
yeast_barplots$edger
yeast_barplots$deseq
Since I didn’t acquire this data in a ‘normal’ way, I am going to post-generate a gff file which may be used by clusterprofiler, topgo, and gostats.
Therefore, I am going to make use of TxDb to make the requisite gff file.
limma_results <- limma_comparison$all_tables
## The set of comparisons performed
names(limma_results)
## [1] "wt30_vs_wt120"
table <- limma_results$wt30_vs_wt120
dim(table)
## [1] 7039 6
gene_names <- rownames(table)
updown_genes <- get_sig_genes(table, p=0.05, lfc=0.4, p_column="P.Value")
## After (adj)p filter, the up genes table has 1190 genes.
## After (adj)p filter, the down genes table has 1424 genes.
## After fold change filter, the up genes table has 962 genes.
## After fold change filter, the down genes table has 1069 genes.
tt <- please_install("GenomicFeatures")
tt <- please_install("biomaRt")
available_marts <- biomaRt::listMarts(host="fungi.ensembl.org")
available_marts
## biomart version
## 1 fungi_mart Ensembl Fungi Genes 41
## 2 fungi_variations Ensembl Fungi Variations 41
ensembl_mart <- biomaRt::useMart("fungi_mart", host="fungi.ensembl.org")
available_datasets <- biomaRt::listDatasets(ensembl_mart)
pombe_hit <- grep(pattern="pombe", x=available_datasets[["description"]])
pombe_name <- available_datasets[pombe_hit, "dataset"]
pombe_mart <- biomaRt::useDataset(pombe_name, mart=ensembl_mart)
pombe_goids <- biomaRt::getBM(attributes=c("pombase_transcript", "go_id"),
values=gene_names, mart=pombe_mart)
colnames(pombe_goids) <- c("ID", "GO")
The above worked, it provided a table of ID and ontology. It was however a bit fraught. Here is another way.
## In theory, the above should work with a single function call:
pombe_goids_simple <- load_biomart_go(species="spombe", overwrite=TRUE,
dl_rows=c("pombase_transcript", "go_id"),
host="fungi.ensembl.org")
## Unable to perform useMart, perhaps the host/mart is incorrect: fungi.ensembl.org ENSEMBL_MART_ENSEMBL.
## The available marts are:
## fungi_martfungi_variations
## Trying the first one.
## Unable to perform useDataset, perhaps the given dataset is incorrect: spombe_gene_ensembl.
## Trying instead to use the dataset: spombe_eg_gene
## That seems to have worked, extracting the resulting annotations.
## Finished downloading ensembl go annotations, saving to spombe_go_annotations.rda.
## Saving ontologies to spombe_go_annotations.rda.
## Finished save().
head(pombe_goids_simple)
## ID GO
## 1 SPRRNA.50.1
## 2 SPNCRNA.1095.1
## 3 SPAC212.11.1 GO:0000784
## 4 SPAC212.11.1 GO:0005634
## 5 SPAC212.11.1 GO:0000166
## 6 SPAC212.11.1 GO:0005524
head(pombe_goids)
## ID GO
## 1 SPRRNA.50.1
## 2 SPNCRNA.1095.1
## 3 SPAC212.11.1 GO:0000784
## 4 SPAC212.11.1 GO:0005634
## 5 SPAC212.11.1 GO:0000166
## 6 SPAC212.11.1 GO:0005524
## This used to work, but does so no longer and I do not know why.
## pombe <- sm(GenomicFeatures::makeTxDbFromBiomart(biomart="fungal_mart",
## dataset="spombe_eg_gene",
## host="fungi.ensembl.org"))
## I bet I can get all this information from ensembl now.
## This was found at the bottom of: https://www.biostars.org/p/232005/
link <- "ftp://ftp.ensemblgenomes.org/pub/release-34/fungi/gff3/schizosaccharomyces_pombe/Schizosaccharomyces_pombe.ASM294v2.34.gff3.gz"
pombe <- GenomicFeatures::makeTxDbFromGFF(link, format="gff3", organism="Schizosaccharomyces pombe",
taxonomyId=4896)
## Import genomic features from the file as a GRanges object ...
## OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ... OK
pombe_transcripts <- as.data.frame(GenomicFeatures::transcriptsBy(pombe))
lengths <- pombe_transcripts[, c("group_name","width")]
colnames(lengths) <- c("ID","width")
## Something useful I didn't notice before:
## makeTranscriptDbFromGFF() ## From GenomicFeatures, much like my own gff2df()
gff_from_txdb <- GenomicFeatures::asGFF(pombe)
## why is GeneID: getting prefixed to the IDs!?
gff_from_txdb$ID <- gsub(x=gff_from_txdb$ID, pattern="GeneID:", replacement="")
written_gff <- rtracklayer::export.gff3(gff_from_txdb, con="pombe.gff")
## Warning in .local(object, con, format, ...): The phase information is missing. The written file will contain
## CDS with no phase information.
summary(updown_genes)
## Length Class Mode
## up_genes 6 data.frame list
## down_genes 6 data.frame list
test_genes <- updown_genes$down_genes
rownames(test_genes) <- paste0(rownames(test_genes), ".1")
lengths$ID <- paste0(lengths$ID, ".1")
goseq_result <- sm(simple_goseq(sig_genes=test_genes, go_db=pombe_goids, length_db=lengths))
head(goseq_result$alldata)
## category over_represented_pvalue under_represented_pvalue
## 371 GO:0005634 9.433e-44 1
## 380 GO:0005730 3.700e-33 1
## 1354 GO:0042254 1.198e-29 1
## 144 GO:0003674 5.976e-28 1
## 382 GO:0005737 4.385e-27 1
## 417 GO:0005829 1.308e-20 1
## numDEInCat numInCat term ontology qvalue
## 371 100 582 nucleus CC 1.765e-40
## 380 35 63 nucleolus CC 3.461e-30
## 1354 26 36 ribosome biogenesis BP 7.473e-27
## 144 77 534 molecular_function MF 2.795e-25
## 382 81 585 cytoplasm CC 1.641e-24
## 417 70 556 cytosol CC 4.079e-18
goseq_result$pvalue_plots$mfp_plot
test_genes <- updown_genes$up_genes
rownames(test_genes) <- paste0(rownames(test_genes), ".1")
goseq_result <- sm(simple_goseq(sig_genes=test_genes, go_db=pombe_goids, length_db=lengths))
head(goseq_result$alldata)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 697 GO:0008150 4.518e-56 1 128
## 382 GO:0005737 1.306e-50 1 128
## 417 GO:0005829 6.057e-50 1 124
## 371 GO:0005634 4.879e-46 1 122
## 144 GO:0003674 1.268e-44 1 114
## 938 GO:0016020 1.583e-40 1 98
## numInCat term ontology qvalue
## 697 548 biological_process BP 8.453e-53
## 382 585 cytoplasm CC 1.222e-47
## 417 556 cytosol CC 3.778e-47
## 371 582 nucleus CC 2.282e-43
## 144 534 molecular_function MF 4.744e-42
## 938 416 membrane CC 4.936e-38
goseq_result$pvalue_plots$bpp_plot
clusterProfiler really prefers an orgdb instance to use, which is probably smart, as they are pretty nice. Sadly, there is no pre-defined orgdb for pombe…
## holy crap makeOrgPackageFromNCBI is slow, no slower than some of mine, so who am I to complain.
orgdb <- AnnotationForge::makeOrgPackageFromNCBI(
version="0.1", author="atb <abelew@gmail.com>",
maintainer="atb <abelew@gmail.com>", tax_id="4896",
genus="Schizosaccharomyces", species="pombe")
## This created the directory 'org.spombe.eg.db'
devtools::install_local("org.Spombe.eg.db")
library(org.Spombe.eg.db)
## Don't forget to remove the terminal .1 from the gene names...
## If you do forget this, it will fail for no easily visible reason until you remember
## this and get really mad at yourself.
rownames(test_genes) <- gsub(pattern=".1$", replacement="", x=rownames(test_genes))
pombe_goids[["ID"]] <- gsub(pattern=".1$", replacement="", x=pombe_goids[["ID"]])
cp_result <- simple_clusterprofiler(sig_genes=test_genes, do_david=FALSE, do_gsea=FALSE,
de_table=all_combined$data[[1]],
orgdb=org.Spombe.eg.db, orgdb_to="ALIAS")
cp_result[["pvalue_plots"]][["ego_all_mf"]]
## Yay bar plots!
## Get rid of those stupid terminal .1s.
rownames(test_genes) <- gsub(pattern=".1$", replacement="", x=rownames(test_genes))
pombe_goids[["ID"]] <- gsub(pattern=".1$", replacement="", x=pombe_goids[["ID"]])
tp_result <- sm(simple_topgo(sig_genes=test_genes, go_db=pombe_goids, pval_column="limma_adjp"))
tp_result[["pvalue_plots"]][["mfp_plot_over"]]
tp_result[["pvalue_plots"]][["bpp_plot_over"]]
## Get rid of those stupid terminal .1s.
rownames(test_genes) <- gsub(pattern=".1$", replacement="", x=rownames(test_genes))
pombe_goids[["ID"]] <- gsub(pattern=".1$", replacement="", x=pombe_goids[["ID"]])
## universe_merge is the column in the final data frame when.
## gff_type is the field in the gff file providing the id, this may be redundant with
## universe merge, that is something to check on...
gst_result <- sm(simple_gostats(sig_genes=test_genes, go_db=pombe_goids, universe_merge="id",
gff_type="gene",
gff="pombe.gff", pval_column="limma_adjp"))
pander::pander(sessionInfo())
R version 3.5.1 (2018-07-02)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: edgeR(v.3.22.5), variancePartition(v.1.10.4), fission(v.0.114.0), SummarizedExperiment(v.1.10.1), DelayedArray(v.0.6.6), BiocParallel(v.1.14.2), matrixStats(v.0.54.0), GenomicRanges(v.1.32.7), GenomeInfoDb(v.1.16.0), IRanges(v.2.14.12), S4Vectors(v.0.18.3), hpgltools(v.2018.11), foreach(v.1.4.4), ruv(v.0.9.7), bindrcpp(v.0.2.2), Biobase(v.2.40.0) and BiocGenerics(v.0.26.0)
loaded via a namespace (and not attached): SparseM(v.1.77), rtracklayer(v.1.40.6), pkgmaker(v.0.27), tidyr(v.0.8.2), ggplot2(v.3.1.0), acepack(v.1.4.1), bit64(v.0.9-7), knitr(v.1.20), data.table(v.1.11.8), rpart(v.4.1-13), RCurl(v.1.95-4.11), doParallel(v.1.0.14), snow(v.0.4-3), GenomicFeatures(v.1.32.3), preprocessCore(v.1.42.0), callr(v.3.0.0), cowplot(v.0.9.3), usethis(v.1.4.0), RSQLite(v.2.1.1), bit(v.1.1-14), enrichplot(v.1.0.2), xml2(v.1.2.0), httpuv(v.1.4.5), assertthat(v.0.2.0), viridis(v.0.5.1), hms(v.0.4.2), evaluate(v.0.12), promises(v.1.0.1), DEoptimR(v.1.0-8), progress(v.1.2.0), caTools(v.1.17.1.1), igraph(v.1.2.2), DBI(v.1.0.0), geneplotter(v.1.58.0), htmlwidgets(v.1.3), purrr(v.0.2.5), crosstalk(v.1.0.0), selectr(v.0.4-1), dplyr(v.0.7.8), backports(v.1.1.2), annotate(v.1.58.0), biomaRt(v.2.36.1), blockmodeling(v.0.3.1), remotes(v.2.0.2), withr(v.2.1.2), ggforce(v.0.1.3), packrat(v.0.4.9-3), robustbase(v.0.93-3), checkmate(v.1.8.5), GenomicAlignments(v.1.16.0), prettyunits(v.1.0.2), cluster(v.2.0.7-1), DOSE(v.3.6.1), lazyeval(v.0.2.1), crayon(v.1.3.4), genefilter(v.1.62.0), pkgconfig(v.2.0.2), labeling(v.0.3), units(v.0.6-1), tweenr(v.1.0.0), nlme(v.3.1-137), pkgload(v.1.0.2), nnet(v.7.3-12), devtools(v.2.0.1), bindr(v.0.1.1), rlang(v.0.3.0.1), registry(v.0.5), doSNOW(v.1.0.16), directlabels(v.2018.05.22), rprojroot(v.1.3-2), graph(v.1.58.2), rngtools(v.1.3.1), Matrix(v.1.2-15), base64enc(v.0.1-3), geneLenDataBase(v.1.16.0), ggridges(v.0.5.1), processx(v.3.2.0), viridisLite(v.0.3.0), bitops(v.1.0-6), KernSmooth(v.2.23-15), pander(v.0.6.3), Biostrings(v.2.48.0), EBSeq(v.1.20.0), blob(v.1.1.1), doRNG(v.1.7.1), stringr(v.1.3.1), qvalue(v.2.12.0), readr(v.1.1.1), scales(v.1.0.0), memoise(v.1.1.0), magrittr(v.1.5), plyr(v.1.8.4), gplots(v.3.0.1), bibtex(v.0.4.2), gdata(v.2.18.0), zlibbioc(v.1.26.0), compiler(v.3.5.1), RColorBrewer(v.1.1-2), lme4(v.1.1-19), DESeq2(v.1.20.0), Rsamtools(v.1.32.3), cli(v.1.0.1), ade4(v.1.7-13), XVector(v.0.20.0), ps(v.1.2.1), htmlTable(v.1.12), Formula(v.1.2-3), MASS(v.7.3-51.1), mgcv(v.1.8-25), tidyselect(v.0.2.5), stringi(v.1.2.4), highr(v.0.7), yaml(v.2.2.0), GOSemSim(v.2.6.2), locfit(v.1.5-9.1), latticeExtra(v.0.6-28), ggrepel(v.0.8.0), grid(v.3.5.1), fastmatch(v.1.1-0), tools(v.3.5.1), rstudioapi(v.0.8), foreign(v.0.8-71), gridExtra(v.2.3), farver(v.1.0), Rtsne(v.0.15), ggraph(v.1.0.2), digest(v.0.6.18), rvcheck(v.0.1.1), shiny(v.1.2.0), quadprog(v.1.5-5), Rcpp(v.1.0.0), later(v.0.7.5), httr(v.1.3.1), AnnotationDbi(v.1.42.1), genoPlotR(v.0.8.7), colorspace(v.1.3-2), rvest(v.0.3.2), XML(v.3.98-1.16), fs(v.1.2.6), topGO(v.2.32.0), splines(v.3.5.1), RBGL(v.1.56.0), plotly(v.4.8.0), sessioninfo(v.1.1.1), xtable(v.1.8-3), jsonlite(v.1.5), nloptr(v.1.2.1), corpcor(v.1.6.9), UpSetR(v.1.3.3), testthat(v.2.0.1), R6(v.2.3.0), Vennerable(v.3.1.0.9000), Hmisc(v.4.1-1), pillar(v.1.3.0), htmltools(v.0.3.6), mime(v.0.6), glue(v.1.3.0), minqa(v.1.2.4), clusterProfiler(v.3.8.1), DESeq(v.1.32.0), codetools(v.0.2-15), fgsea(v.1.6.0), pkgbuild(v.1.0.2), lattice(v.0.20-38), tibble(v.1.4.2), sva(v.3.28.0), pbkrtest(v.0.4-7), curl(v.3.2), BiasedUrn(v.1.07), colorRamps(v.2.3), gtools(v.3.8.1), zip(v.1.0.0), GO.db(v.3.6.0), openxlsx(v.4.1.0), survival(v.2.43-1), limma(v.3.36.5), rmarkdown(v.1.10), desc(v.1.2.0), munsell(v.0.5.0), DO.db(v.2.9), GenomeInfoDbData(v.1.1.0), iterators(v.1.0.10), goseq(v.1.32.0), reshape2(v.1.4.3) and gtable(v.0.2.0)